R for faculty, Fall 2024
RThere are three main options for doing graphics in R:
graphics library (preinstalled).ggplot2 library, which is more flexible but harder to use.lattice library, which I am unfamiliar with.We focus on base graphics.
Change the color of the elements to cyan.
Remove all text from the plot, leaving only the points.
Make a scatterplot, with the same data, using triangles. To do this, find the pch code for a triangle (google, e.g., pch r there are multiple!) and make a plot.
Use rownames(mtcars) to color your three favorite cars in blue.
My favorite cars are Hornet 4 Drive and Hornet Sportabout. They have indices 4 and 5.
colors <- rep("black", length(mtcars$mpg))
colors[c(14, 28)] <- "red"
cexes <- rep(1.6, length(mtcars$mpg))
cexes[c(14, 28)] <- 4
plot(x = mtcars$mpg,
y = mtcars$disp,
xlab = "Miles per gallon",
ylab = "Displacement",
main = "Important information concerning automobiles",
col = colors,
pch = 20,
cex = cexes)Change the point style of the red elements to a filled diamond.
colors <- rep("black", length(mtcars$mpg))
colors[c(14, 28)] <- "red"
cexes <- rep(1.6, length(mtcars$mpg))
cexes[c(14, 28)] <- 4
pchs <- rep(20, length(mtcars$mpg))
pchs[c(14,28)] <- 18
plot(x = mtcars$mpg,
y = mtcars$disp,
xlab = "Miles per gallon",
ylab = "Displacement",
main = "Important information concerning automobiles",
col = colors,
pch = pchs,
cex = cexes)colors <- rep("black", length(mtcars$mpg))
colors[c(14, 28)] <- "red"
cexes <- rep(1.6, length(mtcars$mpg))
cexes[c(14, 28)] <- 4
pchs <- rep(20, length(mtcars$mpg))
pchs[c(14,28)] <- 18
plot(x = mtcars$mpg,
y = mtcars$disp,
xlab = "Miles per gallon",
ylab = "Displacement",
main = "Important information concerning automobiles",
col = colors,
pch = pchs,
cex = cexes)
points(x = mtcars$mpg[-c(14, 28)],
y = mtcars$disp[-c(14, 28)],
pch = 20,
col = "black",
cex = 1.6)xlim and ylimWhat happens here?
plot(x = mtcars$mpg[c(14, 28)],
y = mtcars$disp[c(14, 28)],
xlab = "Miles per gallon",
ylab = "Displacement",
main = "Important information",
col = "red",
pch = 18,
cex = 4,
xlim = c(min(mtcars$mpg), max(mtcars$mpg)),
ylim = c(min(mtcars$disp), max(mtcars$disp)))
points(x = mtcars$mpg[-c(14, 28)],
y = mtcars$disp[-c(14, 28)],
pch = 20,
col = "black",
cex = 1.6)Transparency can be added using the rbg function. The final argument is the level of transparency (0-1), the first three are red-green-blue values between 0 and 1.
plot(x = mtcars$mpg[c(14, 28)],
y = mtcars$disp[c(14, 28)],
xlab = "Miles per gallon",
ylab = "Displacement",
main = "Important information",
col = rgb(1, 0, 0, 0.6),
pch = 18,
cex = 4,
xlim = c(min(mtcars$mpg), max(mtcars$mpg)),
ylim = c(min(mtcars$disp), max(mtcars$disp)))
points(x = mtcars$mpg[-c(14, 28)],
y = mtcars$disp[-c(14, 28)],
pch = 20,
col = "black",
cex = 1.6)Make the red points even more transparent.
plot(x = mtcars$mpg[c(14, 28)],
y = mtcars$disp[c(14, 28)],
xlab = "Miles per gallon",
ylab = "Displacement",
main = "Important information",
col = rgb(1, 0, 0, 0.1),
pch = 18,
cex = 4,
xlim = c(min(mtcars$mpg), max(mtcars$mpg)),
ylim = c(min(mtcars$disp), max(mtcars$disp)))
points(x = mtcars$mpg[-c(14, 28)],
y = mtcars$disp[-c(14, 28)],
pch = 20,
col = "black",
cex = 1.6)Use grid to add grid lines. Use this when the numbers are important, not just the shape.
The grid line at approximately (15, 300) doesn’t look good, since it’s plotted above the points. Use the point function to fix the problem.
We must plot the points again.
ablineplot(x = mtcars$mpg,
y = mtcars$disp,
xlab = "Miles per gallon",
ylab = "Displacement",
main = "Important information concerning automobiles",
col = "black",
cex = 1.6,
pch = 20,
bty = "l",
xlim = c(0, 40))
grid()
points(x = mtcars$mpg,
y = mtcars$disp,
col = "black",
cex = 1.6,
pch = 20)
abline(v = 10, lty = 1, lwd = 2, col = "red")plot(x = mtcars$mpg,
y = mtcars$disp,
xlab = "Miles per gallon",
ylab = "Displacement",
main = "Important information concerning automobiles",
col = "black",
cex = 1.6,
pch = 20,
bty = "l",
ylim = c(0, 500),
xlim = c(0, 40))
grid()
points(x = mtcars$mpg,
y = mtcars$disp,
col = "black",
cex = 1.6,
pch = 20)
abline(v = 10, lty = 1, lwd = 2, col = "red")
abline(h = 50, lty = 1, lwd = 2, col = "blue")Inspect the documentation of abline and add a purple line with slope 1000 and intercept -50.
plot(x = mtcars$mpg,
y = mtcars$disp,
xlab = "Miles per gallon",
ylab = "Displacement",
main = "Important information concerning automobiles",
col = "black",
cex = 1.6,
pch = 20,
bty = "l",
ylim = c(0, 500),
xlim = c(0, 40))
grid()
points(x = mtcars$mpg,
y = mtcars$disp,
col = "black",
cex = 1.6,
pch = 20)
abline(v = 10, lty = 1, lwd = 2, col = "red")
abline(h = 50, lty = 1, lwd = 2, col = "blue")
abline(a = 1000, b = -50, lwd = 2, col = "purple")The text function adds labels to a plot.
plot(x = mtcars$mpg[c(14, 28)],
y = mtcars$disp[c(14, 28)],
xlab = "Miles per gallon",
ylab = "Displacement",
main = "Important information concerning automobiles",
col = rgb(1, 0, 0, 0.6),
pch = 18,
cex = 4,
xlim = c(min(mtcars$mpg), max(mtcars$mpg)),
ylim = c(min(mtcars$disp), max(mtcars$disp)))
points(x = mtcars$mpg[-c(14, 28)],
y = mtcars$disp[-c(14, 28)],
pch = 20,
col = "black",
cex = 1.6)
text(x = mtcars$mpg[c(14, 28)],
y = mtcars$disp[c(14, 28)],
labels = rownames(mtcars)[c(14, 28)])The labels overlap the points. We must fix this manually.
plot(x = mtcars$mpg[c(14, 28)],
y = mtcars$disp[c(14, 28)],
xlab = "Miles per gallon",
ylab = "Displacement",
main = "Important information concerning automobiles",
col = rgb(1, 0, 0, 0.6),
pch = 18,
cex = 4,
xlim = c(min(mtcars$mpg), max(mtcars$mpg)),
ylim = c(min(mtcars$disp), max(mtcars$disp)),
bty = "l")
points(x = mtcars$mpg[-c(14, 28)],
y = mtcars$disp[-c(14, 28)],
pch = 20,
col = "black",
cex = 1.6)
text(x = mtcars$mpg[c(14, 28)],
y = mtcars$disp[c(14, 28)],
labels = rownames(mtcars)[c(14, 28)],
pos = c(1, 3),
offset = c(1,1))The loess function estimates a scatterplot smoother.
Let’s use a line plot.
We must sort the miles per gallons to get a good plot.
plot(x = mtcars$mpg[c(14, 28)],
y = mtcars$disp[c(14, 28)],
xlab = "Miles per gallon",
ylab = "Displacement",
main = "Important information concerning automobiles",
col = rgb(1, 0, 0, 0.6),
pch = 18,
cex = 4,
xlim = c(min(mtcars$mpg), max(mtcars$mpg)),
ylim = c(min(mtcars$disp), max(mtcars$disp)),
bty = "l")
points(x = mtcars$mpg[-c(14, 28)],
y = mtcars$disp[-c(14, 28)],
pch = 20,
col = "black",
cex = 1.6)
text(x = mtcars$mpg[c(14, 28)],
y = mtcars$disp[c(14, 28)],
labels = rownames(mtcars)[c(14, 28)],
pos = c(1, 3),
offset = c(1,1))
x <- sort(mtcars$mpg)
lines(x, predict(model, x), lwd = 2, lty = 3, col = "grey")The scatterplot smoother is plotted above the data points, crearting an ugly effec (see mpg ~ 30). Modify the code so the points are plotted on top of the smoother.
model <- loess(disp ~ mpg, data = mtcars)
plot(x = mtcars$mpg[c(14, 28)],
y = mtcars$disp[c(14, 28)],
xlab = "Miles per gallon",
ylab = "Displacement",
main = "Important information concerning automobiles",
col = rgb(1, 0, 0, 0.6),
pch = 18,
cex = 4,
xlim = c(min(mtcars$mpg), max(mtcars$mpg)),
ylim = c(min(mtcars$disp), max(mtcars$disp)),
bty = "l")
points(x = mtcars$mpg[-c(14, 28)],
y = mtcars$disp[-c(14, 28)],
pch = 20,
col = "black",
cex = 1.6)
text(x = mtcars$mpg[c(14, 28)],
y = mtcars$disp[c(14, 28)],
labels = rownames(mtcars)[c(14, 28)],
pos = c(1, 3),
offset = c(1,1))
x <- sort(mtcars$mpg)
lines(x, predict(model, x), lwd = 2, lty = 3, col = "grey")
legend("topright", legend = c("Scatterplot smoother", "Unimportant cars", "Important cars"), col = c("grey", "black", "red"))model <- loess(disp ~ mpg, data = mtcars)
plot(x = mtcars$mpg[c(14, 28)],
y = mtcars$disp[c(14, 28)],
xlab = "Miles per gallon",
ylab = "Displacement",
main = "Important information concerning automobiles",
col = rgb(1, 0, 0, 0.6),
pch = 18,
cex = 4,
xlim = c(min(mtcars$mpg), max(mtcars$mpg)),
ylim = c(min(mtcars$disp), max(mtcars$disp)),
bty = "l")
points(x = mtcars$mpg[-c(14, 28)],
y = mtcars$disp[-c(14, 28)],
pch = 20,
col = "black",
cex = 1.6)
text(x = mtcars$mpg[c(14, 28)],
y = mtcars$disp[c(14, 28)],
labels = rownames(mtcars)[c(14, 28)],
pos = c(1, 3),
offset = c(1,1))
x <- sort(mtcars$mpg)
lines(x, predict(model, x), lwd = 2, lty = 3, col = "grey")
legend("topright", legend = c("Scatterplot smoother", "Unimportant cars", "Important cars"), col = c("grey", "black", "red"))plot(x = mtcars$mpg[c(14, 28)],
y = mtcars$disp[c(14, 28)],
xlab = "Miles per gallon",
ylab = "Displacement",
main = "Important information concerning automobiles",
col = rgb(1, 0, 0, 0.6),
pch = 18,
cex = 4,
xlim = c(min(mtcars$mpg), max(mtcars$mpg)),
ylim = c(min(mtcars$disp), max(mtcars$disp)),
bty = "l")
points(x = mtcars$mpg[-c(14, 28)],
y = mtcars$disp[-c(14, 28)],
pch = 20,
col = "black",
cex = 1.6)
text(x = mtcars$mpg[c(14, 28)],
y = mtcars$disp[c(14, 28)],
labels = rownames(mtcars)[c(14, 28)],
pos = c(1, 3),
offset = c(1,1))
x <- sort(mtcars$mpg)
lines(x, predict(model, x), lwd = 2, lty = 3, col = "grey")
legend("topright",
legend = c("Scatterplot smoother", "Unimportant cars", "Important cars"),
col = c("grey", "black", "red"),
lwd = c(2, NA, NA),
pch = c(NA, 20, 16),
lty = c(3, NA, NA))plot(x = mtcars$mpg[c(14, 28)],
y = mtcars$disp[c(14, 28)],
xlab = "Miles per gallon",
ylab = "Displacement",
main = "Important information concerning automobiles",
sub = "One gallon equals ~3.8 litres",
col = rgb(1, 0, 0, 0.6),
pch = 18,
cex = 4,
xlim = c(min(mtcars$mpg), max(mtcars$mpg)),
ylim = c(min(mtcars$disp), max(mtcars$disp)),
bty = "l")
points(x = mtcars$mpg[-c(14, 28)],
y = mtcars$disp[-c(14, 28)],
pch = 20,
col = "black",
cex = 1.6)
text(x = mtcars$mpg[c(14, 28)],
y = mtcars$disp[c(14, 28)],
labels = rownames(mtcars)[c(14, 28)],
pos = c(1, 3),
offset = c(1,1))
x <- sort(mtcars$mpg)
lines(x, predict(model, x), lwd = 2, lty = 3, col = "grey")
legend("topright",
legend = c("Scatterplot smoother", "Unimportant cars", "Important cars"),
col = c("grey", "black", rgb(1, 0, 0, 0.6)),
lwd = c(2, NA, NA),
pch = c(NA, 20, 18),
lty = c(3, NA, NA),
pt.cex = c(0, 1, 4/1.6),
bty = "n")Modify the code so that the “Scatterplot smoother” appears at the bottom of the legend, not the top. The important cars should be on the top.
plot(x = mtcars$mpg[c(14, 28)],
y = mtcars$disp[c(14, 28)],
xlab = "Miles per gallon",
ylab = "Displacement",
main = "Important information concerning automobiles",
sub = "One gallon equals ~3.8 litres",
col = rgb(1, 0, 0, 0.6),
pch = 18,
cex = 4,
xlim = c(min(mtcars$mpg), max(mtcars$mpg)),
ylim = c(min(mtcars$disp), max(mtcars$disp)),
bty = "l")
points(x = mtcars$mpg[-c(14, 28)],
y = mtcars$disp[-c(14, 28)],
pch = 20,
col = "black",
cex = 1.6)
text(x = mtcars$mpg[c(14, 28)],
y = mtcars$disp[c(14, 28)],
labels = rownames(mtcars)[c(14, 28)],
pos = c(1, 3),
offset = c(1,1))
x <- sort(mtcars$mpg)
lines(x, predict(model, x), lwd = 2, lty = 3, col = "grey")
legend("topright",
legend = c("Important cars", "Unimportant cars", "Scatterplot smoother"),
col = c(rgb(1, 0, 0, 0.6), "black", "grey"),
lwd = c(NA, NA, 2),
pch = c(18, 20, NA),
lty = c(NA, NA, 3),
pt.cex = c(4/1.6, 1, 0),
bty = "n")pdf(file = "cars.pdf", width = 8, height = 8)
plot(x = mtcars$mpg[c(14, 28)],
y = mtcars$disp[c(14, 28)],
xlab = "Miles per gallon",
ylab = "Displacement",
main = "Important information concerning automobiles",
sub = "One gallon equals ~3.8 litres",
col = rgb(1, 0, 0, 0.6),
pch = 18,
cex = 4,
xlim = c(min(mtcars$mpg), max(mtcars$mpg)),
ylim = c(min(mtcars$disp), max(mtcars$disp)),
bty = "l")
points(x = mtcars$mpg[-c(14, 28)],
y = mtcars$disp[-c(14, 28)],
pch = 20,
col = "black",
cex = 1.6)
text(x = mtcars$mpg[c(14, 28)],
y = mtcars$disp[c(14, 28)],
labels = rownames(mtcars)[c(14, 28)],
pos = c(1, 3),
offset = c(1,1))
x <- sort(mtcars$mpg)
lines(x, predict(model, x), lwd = 2, lty = 3, col = "grey")
legend("topright",
legend = c("Important cars", "Unimportant cars", "Scatterplot smoother"),
col = c(rgb(1, 0, 0, 0.6), "black", "grey"),
lwd = c(NA, NA, 2),
pch = c(18, 20, NA),
lty = c(NA, NA, 3),
pt.cex = c(4/1.6, 1, 0),
bty = "n")
dev.off()pdf(file = "cars.pdf", width = 16, height = 8)
plot(x = mtcars$mpg[c(14, 28)],
y = mtcars$disp[c(14, 28)],
xlab = "Miles per gallon",
ylab = "Displacement",
sub = "One gallon equals ~3.8 litres",
col = rgb(1, 0, 0, 0.6),
pch = 18,
cex = 4,
xlim = c(min(mtcars$mpg), max(mtcars$mpg)),
ylim = c(min(mtcars$disp), max(mtcars$disp)),
cex.lab = 1.5,
cex.main = 2,
bty = "l")
points(x = mtcars$mpg[-c(14, 28)],
y = mtcars$disp[-c(14, 28)],
pch = 20,
col = "black",
cex = 1.6)
text(x = mtcars$mpg[c(14, 28)],
y = mtcars$disp[c(14, 28)],
labels = rownames(mtcars)[c(14, 28)],
pos = c(1, 3),
offset = c(1,1))
x <- sort(mtcars$mpg)
lines(x, predict(model, x), lwd = 3, lty = 3, col = "grey")
legend("topright",
legend = c("Scatterplot smoother", "Unimportant cars", "Important cars"),
col = c("grey", "black", rgb(1, 0, 0, 0.6)),
lwd = c(3, NA, NA),
pch = c(NA, 20, 18),
lty = c(3, NA, NA),
pt.cex = c(0, 1, 4/1.6),
cex = c(1.3,1.3,1.3),
bty = "n")
dev.off()Some journals do not accept pdf graphs.
BMP, JPEG, PNG and TIFF. Avoid these if possible, as they are lossy.eps is a PostScript format that is often accepted.svg is probably the preferred format.xlabylabmainlinespointsaddcolortypeltypchThere are many satistical plots, including:
Change the color of hp to green and displacement to yellow.
Add an informative title to the plot. Change the colors back to something pleasant.
ggplot2The package ggplot2 is a better option for bar graphs and count graphs. See, e.g., this page.
Change the color of the histogram to white. Make the histogram a density histogram, i.e., the integral should integrate to 1. (By default, it shows counts.)
The default value for breaks (the number of bins + 1) is too small by default, as R uses an outdated method for calculating the breaks. Experiment with different breaks and choose a “pleasing” one.
We are sometimes interested in evaluating wether a dataset is approximately normal. We can do this visually (but unreliably!) by plotting a normal density curve on top of a histogram.
mu <- mean(mtcars$mpg)
sigma <- sd(mtcars$mpg)
x <- seq(min(mtcars$mpg), max(mtcars$mpg), length.out = 100)
x [1] 10.40000 10.63737 10.87475 11.11212 11.34949 11.58687 11.82424 12.06162
[9] 12.29899 12.53636 12.77374 13.01111 13.24848 13.48586 13.72323 13.96061
[17] 14.19798 14.43535 14.67273 14.91010 15.14747 15.38485 15.62222 15.85960
[25] 16.09697 16.33434 16.57172 16.80909 17.04646 17.28384 17.52121 17.75859
[33] 17.99596 18.23333 18.47071 18.70808 18.94545 19.18283 19.42020 19.65758
[41] 19.89495 20.13232 20.36970 20.60707 20.84444 21.08182 21.31919 21.55657
[49] 21.79394 22.03131 22.26869 22.50606 22.74343 22.98081 23.21818 23.45556
[57] 23.69293 23.93030 24.16768 24.40505 24.64242 24.87980 25.11717 25.35455
[65] 25.59192 25.82929 26.06667 26.30404 26.54141 26.77879 27.01616 27.25354
[73] 27.49091 27.72828 27.96566 28.20303 28.44040 28.67778 28.91515 29.15253
[81] 29.38990 29.62727 29.86465 30.10202 30.33939 30.57677 30.81414 31.05152
[89] 31.28889 31.52626 31.76364 32.00101 32.23838 32.47576 32.71313 32.95051
[97] 33.18788 33.42525 33.66263 33.90000
Using lines, add the normal density plot to the histogram. Be sure to choose a nice color and line type (lty)!
The Shapiro–Wilk test is among the most widely used tests of normality. Find the Shapiro–Wilk test in R and evaluate the normality of mtcars$mpg formally.
You put multiple plots in the same window using par(mfrow())
par(mfrow = c(2,2))
hist(mtcars$mpg, xlab = "Miles per gallon", col = "white", main = NULL, freq = FALSE, breaks = 10)
rug(mtcars$mpg)
lines(x, dnorm(x, mu, sigma), lty = 3, col = "black", lwd = 2)
qqnorm(mtcars$mpg, main = NULL, bty = "l")
qqline(mtcars$mpg)
boxplot(mtcars$mpg, xlab = "Miles per gallon")
plot(mtcars$mpg, mtcars$disp, xlab = "Miles per gallon", ylab = "Displacement", pch = 20)plot is genericThe plot function is generic. It can be applied to many different objects, with sometimes unpredictable behaviour!
property_type buyer_type
1 flat single male
2 flat single male
3 flat single male
4 flat single male
5 flat single male
6 flat single male
To find the documentation of a generic, you must know the class of the object you use.
Call plot(dnorm, -5, 5). What does the plot function do now? Find its documentation.
Look up plot.function.